Setup

Set up environment and load in data

Fit to subject data: First RL then DDM

Parameter distributions

Distribution of converged values for each subject (25 histograms per parameter for each of the three parameters)

optim_out_path = paste0(cpueaters_path, 'ddModels/cluster_scripts/optim_out/fit1/')
subnums = c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "22", "23", "24", "25", "27")
data_prefix ="sub_data"
model = "model1c"  

ddm_fit_iters = data.frame()

for(i in 1:length(subnums)){
  cur_subnum = subnums[i]
  tmp = get_optim_out(model_=model, data_=paste0(data_prefix, cur_subnum), optim_out_path_=optim_out_path, iters_ = TRUE)
  tmp$subnum = cur_subnum
  ddm_fit_iters = rbind.all.columns(tmp, ddm_fit_iters)
}

ddm_fit_iters = ddm_fit_iters %>%
  rename(d = Param1, sigma = Param2, delta = Param3)
ddm_best_sub_ests = ddm_fit_iters %>% 
  filter(Iteration != 1) %>%
  group_by(subnum) %>%
  mutate(best_sub_est = ifelse(Result == min(Result), 1, 0)) %>%
  filter(best_sub_est == 1) %>%
  select(-Result,-Iteration,-kernel,-best_sub_est) %>%
  gather(key, value, -subnum)
p = ddm_fit_iters %>%
  filter(Iteration != 1) %>%
  select(-Result,-Iteration,-kernel) %>%
  gather(key, value, -subnum) %>%
  ggplot(aes(value))+
  geom_histogram(bins=50)+
  geom_vline(data=ddm_best_sub_ests, aes(xintercept=value), color="red")+
  facet_grid(subnum~key, scales="free")

fig_fn = 'ddm_model1c_fit1'
ggsave(file=paste0(fig_out_path, fig_fn, '_par_hists.jpg'), p, height = 11, width=8, units="in")

Distribution of best fitting parameter across subjects (one histogram per parameter with 25 values).

d and sigmas look in the recoverable space. When combined with these distributions for d and sigmas, delta also are in a more recoverable space. Still, this isn’t a full proof test. Ideal would be to estimate a full posterior to quantify the uncertainty around each estimate.

ddm_best_sub_ests %>%
  ggplot(aes(value))+
  geom_histogram(bins=30) +
  facet_wrap(~key, scales="free")

Fit to subject data: RL+DDM

Parameter distributions

optim_out_path = paste0(cpueaters_path, 'ddrlModels/cluster_scripts/optim_out/fit1/')
subnums = c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "22", "23", "24", "25", "27")
data_prefix ="sub_data"
model = "model1c"  

ddrl_fit_iters = data.frame()

for(i in 1:length(subnums)){
  cur_subnum = subnums[i]
  tmp = get_optim_out(model_=model, data_=paste0(data_prefix, cur_subnum), optim_out_path_=optim_out_path, iters_ = TRUE)
  tmp$subnum = cur_subnum
  ddrl_fit_iters = rbind.all.columns(tmp, ddrl_fit_iters)
}

ddrl_fit_iters = ddrl_fit_iters %>%
  rename(d = Param1, sigma = Param2, alpha = Param3, delta = Param4)
ddrl_best_sub_ests = ddrl_fit_iters %>% 
  filter(Iteration != 1) %>%
  group_by(subnum) %>%
  mutate(best_sub_est = ifelse(Result == min(Result), 1, 0)) %>%
  filter(best_sub_est == 1) %>%
  select(-Result,-Iteration,-kernel,-best_sub_est) %>%
  gather(key, value, -subnum)
p = ddrl_fit_iters %>%
  filter(Iteration != 1) %>%
  select(-Result,-Iteration,-kernel) %>%
  gather(key, value, -subnum) %>%
  ggplot(aes(value))+
  geom_histogram(bins=50)+
  geom_vline(data=ddrl_best_sub_ests, aes(xintercept=value), color="red")+
  facet_grid(subnum~key, scales="free")

fig_fn = 'ddrl_model1c_fit1'
ggsave(file=paste0(fig_out_path, fig_fn, '_par_hists.jpg'), p, height = 11, width=8, units="in")

Distribution of best fitting parameter across subjects (one histogram per parameter with 25 values). Compared to results above the deltas look very different and more often >1, which suggests an overweighting of fractal values, instead of the underweigting we observe in the data.

ddrl_best_sub_ests %>%
  ggplot(aes(value))+
  geom_histogram(bins=30) +
  facet_wrap(~key, scales="free")

Comparison of the two fits

Likelihoods

ddrl_fit_iters %>%
  filter(Iteration != 1) %>%
  select(Result, subnum) %>%
  mutate(model = "ddrl") %>%
  rbind(ddm_fit_iters %>%
          filter(Iteration != 1) %>%
          select(Result, subnum) %>%
          mutate(model = "ddm")) %>%
  ggplot(aes(Result, fill=model)) +
  geom_histogram(bins=30, alpha=.5, position="identity") +
  facet_wrap(~subnum, scales="free")+
  theme(legend.position = "bottom")+
  scale_fill_manual(values=cbbPalette[1:2])+
  labs(fill="")

Parameter estimates

MLE scatter plots

Drift rate and sigmas are similar across the two models but when fitting learning rates at the same time the probability distortion parameter delta is consistently higher and >1 (overestimation of fractal relevance).

ddm_best_sub_ests %>%
  mutate(model = "ddm") %>%
  rbind(ddrl_best_sub_ests %>%
          mutate(model = "ddrl")) %>%
  group_by(subnum) %>%
  spread(model, value) %>%
  drop_na() %>%
  ggplot(aes(ddm, ddrl))+
  geom_point()+
  geom_abline(aes(slope=1, intercept=0))+
  facet_wrap(~key, scales="free")

Distributions of all converged values

p = ddrl_fit_iters %>%
  filter(Iteration != 1) %>%
  select(d, sigma, delta, subnum) %>%
  mutate(model = "ddrl") %>%
  rbind(ddm_fit_iters %>%
          filter(Iteration != 1) %>%
          select(d, sigma, delta, subnum) %>%
          mutate(model = "ddm")) %>%
  gather(key, value, -subnum, -model) %>%
  ggplot(aes(value, fill=model)) +
  geom_histogram(bins=30, alpha=.5, position="identity") +
  facet_grid(subnum~key, scales="free")+
  theme(legend.position = "bottom")+
  scale_fill_manual(values=cbbPalette[1:2])+
  labs(fill="")

fig_fn = 'ddm_ddrl_model1c'
ggsave(file=paste0(fig_out_path, fig_fn, '_par_hist_comparison.jpg'), p, height = 11, width=8, units="in")

Comparison of ddrl alpha’s to hierarchical rl alpha’s

When estimating learning rates at the same time as the other ddm parameters they are consistently higher than those estimated from the the hierarchical fit. Note, however, that the hierarchical model used to estimate these alphas also included a two parameter (delta and gamma) probability distortion function that affected both the lottery and fractal values.

all_trials_data = read.csv('/Users/zeynepenkavi/Downloads/GTavares_2017_arbitration/behavioral_data/all_trials.csv')
all_trials_data %>%
  drop_na() %>%
  select(subnum, alpha) %>%
  distinct() %>%
  rename(rl = alpha) %>% 
  left_join(ddrl_best_sub_ests %>%
              filter(key == "alpha") %>%
              mutate(subnum = as.numeric(subnum)) %>%
              select(-key) %>%
              rename(ddrl = value), by="subnum") %>%
  ggplot(aes(rl, ddrl))+
  geom_point()+
  geom_abline(aes(slope=1, intercept=0))+
  labs("Comparison of learning rates from hierarchical vs. ddrl fit")

Posterior predictive checks

RL -> DDM

Simulate data using the best fitting parameters for each subject

Note: This model might not be capturing all features of the data well since (at least at the group level) we know there are effects, such as the faster choices at probFractalDraw == 1, that this model is not designed to capture.

ddm_best_sub_ests = ddm_best_sub_ests %>%
  spread(key, value)
source(paste0(helpers_path, 'ddModels/sim_task.R'))
source(paste0(helpers_path, 'ddModels/r_ddm_models/ddm_model1c.R'))
sim_trial_list = list()
sim_trial_list[['model1c']] = sim_trial
all_trials_stim_data = all_trials_data %>%
  select(subnum, probFractalDraw, leftQValue, rightQValue, rightLotteryEV, leftLotteryEV) %>%
  rename(QVLeft = leftQValue, QVRight = rightQValue, EVLeft = leftLotteryEV, EVRight = rightLotteryEV) %>%
  drop_na()

Simulate data for all subjects using the actual stimuli they saw in the experiment

model = "model1c"

ddm_pred_data = data.frame()

for(i in 1:length(subnums)){
  cur_sub = subnums[i]
  cur_stim = all_trials_stim_data %>% filter(subnum == as.numeric(cur_sub))
  cur_pars = ddm_best_sub_ests %>% filter(subnum == cur_sub)
  tmp = sim_task(stimuli = cur_stim, model_name = model, d = cur_pars$d, delta = cur_pars$delta, sigma = cur_pars$sigma)
  tmp$subnum = cur_sub
  ddm_pred_data = rbind.all.columns(ddm_pred_data, tmp)
}

Group level checks

source(paste0(helpers_path, 'optimPostProcess/sim_sanity_checks.R'))
sim_sanity_checks(ddm_pred_data, checks = c(2,3,4,5), compare_rts = TRUE, compare_logits = TRUE, true_data = all_trials_data, yrange_lim = 25)

## Warning in if (large_range) {: the condition has length > 1 and only the first
## element will be used

Subject level checks

ddm_pred_data %>%
  select(EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime, subnum) %>%
  mutate(data_type = "sim") %>%
  rbind(all_trials_data %>% 
          mutate(choice = ifelse(choiceLeft == 1, "left", "right"),
                                   data_type = "true") %>%
          rename(QVLeft = leftQValue, QVRight = rightQValue, EVLeft = leftLotteryEV, EVRight = rightLotteryEV) %>%
          select(subnum, EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime, data_type)) %>%
  drop_na()%>%
  mutate(probFractalDraw = as.factor(probFractalDraw), 
         log_rt = log(reactionTime),
         subnum = as.factor(as.numeric(subnum))) %>%
  filter(is.finite(log_rt)) %>%
  group_by(subnum, probFractalDraw, data_type) %>%
  summarise(mean_log_rt = mean(log_rt),
            sem_log_rt = sem(log_rt), .groups="keep") %>%
  ggplot(aes(probFractalDraw, mean_log_rt, color=data_type))+
  geom_point()+
  geom_errorbar(aes(ymin = mean_log_rt - sem_log_rt, ymax = mean_log_rt + sem_log_rt), width=.2)+
  labs(title="Inverse U and faster when pFrac ==1?", color="")+
  theme(legend.position = "bottom")+
  facet_wrap(~subnum, scales="free_y")

tmp = ddm_pred_data %>%
  select(subnum, EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime) %>%
  mutate(subnum = as.factor(as.numeric(subnum)),
         probFractalDraw = as.factor(probFractalDraw),
         choiceLeft = ifelse(choice == "left", 1, ifelse(choice=="right", 0, NA)),
         EVDiff = EVLeft - EVRight, 
         QVDiff = QVLeft - QVRight) %>%
  nest(data = -c(probFractalDraw, subnum)) %>% 
  mutate(
    fit = map(data, ~ glm(choiceLeft ~ scale(EVDiff) + scale(QVDiff), data = .x, family=binomial(link="logit"))),
    tidied = map(fit, tidy)
  ) %>% 
  unnest(tidied) %>%
  filter(term != "(Intercept)") %>%
  select(subnum, probFractalDraw, term, estimate, std.error) %>%
  mutate(data_type = "sim")

tmp_true = all_trials_data %>% 
  mutate(choice = ifelse(choiceLeft == 1, "left", "right"),
         data_type = "true") %>%
  rename(QVLeft = leftQValue, QVRight = rightQValue, EVLeft = leftLotteryEV, EVRight = rightLotteryEV) %>%
  select(subnum, EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime, data_type) %>%
  mutate(subnum = as.factor(as.numeric(subnum)),
         probFractalDraw = as.factor(probFractalDraw),
         choiceLeft = ifelse(choice == "left", 1, ifelse(choice=="right", 0, NA)),
         EVDiff = EVLeft - EVRight, 
         QVDiff = QVLeft - QVRight) %>%
  nest(data = -c(probFractalDraw, subnum)) %>% 
  mutate(
    fit = map(data, ~ glm(choiceLeft ~ scale(EVDiff) + scale(QVDiff), data = .x, family=binomial(link="logit"))),
    tidied = map(fit, tidy)
  ) %>% 
  unnest(tidied) %>%
  filter(term != "(Intercept)") %>%
  select(subnum, probFractalDraw, term, estimate, std.error)%>%
  mutate(data_type = "true")

p = tmp %>%
  rbind(tmp_true) %>%
  filter(subnum %in% c("1", "2", "3", "4")) %>%
  ggplot(aes(probFractalDraw, estimate, col=term, group=term))+
  geom_point()+
  geom_line()+
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate +std.error), width=0.2)+
  geom_hline(aes(yintercept=0), linetype="dashed")+
  facet_grid(subnum~data_type, scales="free_y")+
  scale_color_manual(values = cbbPalette[2:1])+
  theme(legend.position = "bottom")+
  labs(color="", title="Relevant attribute effect on choice")+
  ylim(-5, 5)

p

RL + DDM

ddrl_best_sub_ests = ddrl_best_sub_ests %>%
  spread(key, value)
source(paste0(helpers_path, 'ddrlModels/sim_task_sequential.R'))
source(paste0(helpers_path, 'ddrlModels/r_dd_rl_models/ddrl_model1c.R'))
sim_trial_list = list()
sim_trial_list[['model1c']] = sim_trial
all_trials_stim_data = all_trials_data %>%
  select(subnum, probFractalDraw, rightLotteryEV, leftLotteryEV, leftFractalReward, rightFractalReward) %>%
  rename(EVLeft = leftLotteryEV, EVRight = rightLotteryEV) %>%
  drop_na()

Simulate data for all subjects using the actual stimuli they saw in the experiment

model = "model1c"

ddrl_pred_data = data.frame()

for(i in 1:length(subnums)){
  cur_sub = subnums[i]
  cur_stim = all_trials_stim_data %>% filter(subnum == as.numeric(cur_sub))
  cur_pars = ddrl_best_sub_ests %>% filter(subnum == cur_sub)
  tmp = sim_task_sequential(stimuli = cur_stim, model_name = model, d = cur_pars$d, alpha = cur_pars$alpha, delta = cur_pars$delta, sigma = cur_pars$sigma)
  tmp$subnum = cur_sub
  ddrl_pred_data = rbind.all.columns(ddrl_pred_data, tmp)
}

Group level checks

sim_sanity_checks(ddrl_pred_data, checks = c(2,3,4,5), compare_rts = TRUE, compare_logits = TRUE, true_data = all_trials_data, yrange_lim = 25)

## Warning in if (large_range) {: the condition has length > 1 and only the first
## element will be used

Subject level checks

ddrl_pred_data %>%
  select(EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime, subnum) %>%
  mutate(data_type = "sim") %>%
  rbind(all_trials_data %>% 
          mutate(choice = ifelse(choiceLeft == 1, "left", "right"),
                                   data_type = "true") %>%
          rename(QVLeft = leftQValue, QVRight = rightQValue, EVLeft = leftLotteryEV, EVRight = rightLotteryEV) %>%
          select(subnum, EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime, data_type)) %>%
  drop_na()%>%
  mutate(probFractalDraw = as.factor(probFractalDraw), 
         log_rt = log(reactionTime),
         subnum = as.factor(as.numeric(subnum))) %>%
  filter(is.finite(log_rt)) %>%
  group_by(subnum, probFractalDraw, data_type) %>%
  summarise(mean_log_rt = mean(log_rt),
            sem_log_rt = sem(log_rt), .groups="keep") %>%
  ggplot(aes(probFractalDraw, mean_log_rt, color=data_type))+
  geom_point()+
  geom_errorbar(aes(ymin = mean_log_rt - sem_log_rt, ymax = mean_log_rt + sem_log_rt), width=.2)+
  labs(title="Inverse U and faster when pFrac ==1?", color="")+
  theme(legend.position = "bottom")+
  facet_wrap(~subnum, scales="free_y")

tmp = ddm_pred_data %>%
  select(subnum, EVLeft, EVRight, QVLeft, QVRight, probFractalDraw, choice, reactionTime) %>%
  mutate(subnum = as.factor(as.numeric(subnum)),
         probFractalDraw = as.factor(probFractalDraw),
         choiceLeft = ifelse(choice == "left", 1, ifelse(choice=="right", 0, NA)),
         EVDiff = EVLeft - EVRight, 
         QVDiff = QVLeft - QVRight) %>%
  nest(data = -c(probFractalDraw, subnum)) %>% 
  mutate(
    fit = map(data, ~ glm(choiceLeft ~ scale(EVDiff) + scale(QVDiff), data = .x, family=binomial(link="logit"))),
    tidied = map(fit, tidy)
  ) %>% 
  unnest(tidied) %>%
  filter(term != "(Intercept)") %>%
  select(subnum, probFractalDraw, term, estimate, std.error) %>%
  mutate(data_type = "sim")

p = tmp %>%
  rbind(tmp_true) %>%
  filter(subnum %in% c("1", "2", "3", "4")) %>%
  ggplot(aes(probFractalDraw, estimate, col=term, group=term))+
  geom_point()+
  geom_line()+
  geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate +std.error), width=0.2)+
  geom_hline(aes(yintercept=0), linetype="dashed")+
  facet_grid(subnum~data_type, scales="free_y")+
  scale_color_manual(values = cbbPalette[2:1])+
  theme(legend.position = "bottom")+
  labs(color="", title="Relevant attribute effect on choice")+
  ylim(-5, 5)

p